


import json, os, re 


def get_sua_from_text(string: str) -> tuple[str, str]:
    """
        从完整text里摘出system、user、assistant
    """
    # string = string.replace('<|im_start|>', '').replace('<|im_end|>', '')
    
    split = re.split(r'<\|im_start\|>user\n', string)
    _system, _user_assistant = split[0], split[1]
    system = _system.replace('<|im_start|>system\n', '').replace('<|im_start|>','').replace('<|im_end|>','').strip()
    
    split = re.split(r'<\|im_start\|>assistant\n', _user_assistant)
    user = split[0].replace('<|im_end|>','').strip()
    assistant = split[1].replace('<|im_end|>','').replace('<|endoftext|>', '').strip()
    
    if '<|im_start|>system' not in string:
        system = None 
    if '<|im_start|>user' not in string:
        user = None 
    if '<|im_start|>assistant' not in string:
        assistant = None 
    return (system, user, assistant)



if __name__ == '__main__':
    string = '<|im_start|>user\n你好<|im_end|><|im_start|>assistant\n你也好<|im_end|><|endoftext|>'
    ret = get_sua_from_text(string)
    print(ret)
    
    # print('\n\n---\n\n',ass)
    